***************************************************************
* 03_robustness_clean.do
* Afrobarometer Round 8 x GTD – Cleaning & Matching
***************************************************************
//--------------------------------------------------------------
// 03_replication_clean.do — Replication cleaning script
// Project: "The Impact of Terrorism on Democratic Support in Africa"
//--------------------------------------------------------------
version 18.0
clear all
set more off
set rmsg on
cap set scheme plotplain

* --------------------------------------------------------------
* Project root (robust to where the do-file is run from)
* --------------------------------------------------------------
local thisdo = c(filename)

* Fallback if c(filename) is empty (rare)
if "`thisdo'" == "" {
    global root "`c(pwd)'"
}
else {
    * Directory containing this do-file
    local dodir : dirname "`thisdo'"
    * Project root is parent of /dofile
    local root  : dirname "`dodir'"
    global root "`root'"
}

* If someone runs from inside /dofile (extra safety)
if substr("${root}", -6, 6) == "dofile" {
    global root = substr("${root}", 1, length("${root}")-6)
}


*--------------------------------------------------------------
* 0. PATHS (relative; no machine-specific paths)
*--------------------------------------------------------------

global data   "${root}/Original data"
global do     "${root}/dofile"
global out    "${root}/output"
global table  "${out}/tables"
global graph  "${out}/graphs"
global data_new  "${out}/Generated data"

cap mkdir "${out}"
cap mkdir "${table}"
cap mkdir "${graph}"
cap mkdir "${data_new}"


cap log close _all
log using "${out}/03_robustness_clean.log", text replace name(rep)

***************************************************************
* 1. SURVEY DATA (AFROBAROMETER R8) – LOCALITY & DATES
***************************************************************

use "${data}/Original_R8.dta", clear

*--------------------------------------------------------------
* 1.1 Interview date and time within country
*--------------------------------------------------------------
* Numeric interview date
gen intervdate = DATEINTR
format intervdate %td
label var intervdate "Interview date (numeric Stata date)"

* Fieldwork window (global)
summ intervdate if !missing(intervdate)
scalar field_start = r(min)
scalar field_end   = r(max)

display "Fieldwork start: " %td field_start
display "Fieldwork end:   " %td field_end

* Time since first interview within country
egen min_intervdate = min(intervdate), by(COUNTRY)
gen time_interviewed = intervdate - min_intervdate + 1 if !missing(intervdate)
drop min_intervdate
label var time_interviewed "Day of fieldwork within country (1 = first day)"


***************************************************************
* 1.4 IDs AND STRINGS FOR MATCHING (R9)
***************************************************************
* ID and locality
encode COUNTRY, ge(country_name)
decode country_name, gen(country1)
* Unique respondent ID
//drop id1
gen id1 = _n
* Locality and country for matching
replace locality1 = trim(lower(locality1))
replace country1 = trim(lower(country1))

* Combined locality + country

gen locality_country1 = locality1 + " " + country1
sort id1 COUNTRY
save "${data_new}/R8_c.dta", replace

***************************************************************
* 2. GTD – DATES, TIME_EXPOSED, LOCALITY STRINGS
***************************************************************

use "${data}/Original_gtd.dta", clear
* Event date
gen edate = mdy(imonth, iday, iyear)
gen eventdate = edate
format eventdate %td
label var eventdate "Event date (GTD)"

* Unique ID and locality/country
gen id2      = _n
//gen locality2 = provstate
//gen country2  = country_txt
encode country_txt, ge(cntrynum)

gen locality_country2 = locality2 + " " + country2

sort id2 country_txt

save "${data_new}/gtd_c.dta", replace

***************************************************************
* 3. FUZZY MATCHING – matchit ON locality+country
***************************************************************

*--------------------------------------------------------------
* 1. Get UNIQUE locality_country in R9
*--------------------------------------------------------------
use "${data_new}/R8_c.dta", clear

* Keep only the string we match on
keep locality_country1

* One row per locality_country1
duplicates drop locality_country1, force

* ID required by matchit
gen locid1 = _n

save "${data_new}/R8_loc_unique.dta", replace


*--------------------------------------------------------------
* 2. Get UNIQUE locality_country in ACLED
*--------------------------------------------------------------
use "${data_new}/gtd_c.dta", clear

* Keep the string + event id
keep id2 locality_country2


* ID required by matchit (for the using side)
gen locid2 = _n

save "${data_new}/gtd_loc_unique.dta", replace


*--------------------------------------------------------------
* 3. Fuzzy match on unique localities (MUCH smaller)
*--------------------------------------------------------------
capture which matchit
if _rc ssc install matchit

use "${data_new}/R8_loc_unique.dta", clear

matchit locid1 locality_country1 using "${data_new}/gtd_loc_unique.dta", ///
    idu(locid2) txtu(locality_country2) threshold(0.9)

save "${data_new}/loc_match.dta", replace

*--------------------------------------------------------------
* 4. Bring id2 back to locality level, keep BEST match per locality
*--------------------------------------------------------------
use "${data_new}/loc_match.dta", clear

* Attach id2 from the ACLED locality file
merge m:1 locid2 using "${data_new}/gtd_loc_unique.dta", keep(match) nogen

* If matchit returns multiple candidate matches per locid1,
* keep the one with the highest similarity score
sort locid1 similscore
by locid1: keep if _n == _N   // last = highest similscore

* Keep only what we need to merge back to individuals
keep locality_country1 locality_country2 id2 similscore

save "${data_new}/loc_to_id2.dta", replace


*--------------------------------------------------------------
* 5. Merge event ID back to full R9 data
*--------------------------------------------------------------
use "${data_new}/R8_c.dta", clear

* One id2 per locality assigned to ALL respondents in that locality
merge m:1 locality_country1 using "${data_new}/loc_to_id2.dta", ///
    keep(master match) nogen

* Now each respondent has an ACLED event ID (id2)
save "${data_new}/R8_with_id2.dta", replace


***************************************************************
* 4. FINAL MERGE: R8 + GTD EVENTS
***************************************************************

* Start from respondent data with matched id2
use "${data_new}/R8_with_id2.dta", clear

* Merge GTD information by id2
merge m:1 id2 using "${data_new}/gtd_c.dta", keep(match) nogen

save "${data_new}/finaldata_merge.dta", replace


* Time difference between interview and matched event (days)
gen tdiff = intervdate - eventdate if !missing(intervdate) & !missing(eventdate)
label var tdiff "Days between interview and matched event"
summ tdiff

* Post-event indicator (example)
gen time_zero = (tdiff > 0) if !missing(tdiff)
label var time_zero "Interview after matched event (1 = yes)"

* Survey year (for FE) – create only if missing
capture confirm variable surveyyear
if _rc {
    gen surveyyear = year(DATEINTR)
    label var surveyyear "Survey year (from DATEINTR)"
}

* 'year' variable for country-year merges – create only if missing
capture confirm variable year
if _rc {
    capture confirm variable surveyyear
    if !_rc {
        gen year = surveyyear
    }
    else {
        gen year = year(DATEINTR)
    }
}

* Numeric country ID (if not already there)
capture confirm variable cntrynum
if _rc {
    encode country, gen(cntrynum)
}

save "${data_new}/finaldata2.dta", replace
***************************************************************
* 7. DEMOGRAPHICS – R8 CODING
***************************************************************

* Age
tab Q1, nolabel
replace Q1 = . if Q1 == 998 | Q1 == 999
clonevar age = Q1
label var age "Respondent's age"

* Gender (Q101)
recode Q101 (1 = 0) (2 = 1) (9999 = .), gen(gender)
label define gender_lbl 0 "Male" 1 "Female", replace
label values gender gender_lbl
label var gender "Gender of respondent"
tab gender

* Race (Q102 -> race; plus grouped race_group)
recode Q102 ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (6 = 5) ///
    (9995 = 6) ///
    (9999 = .), gen(race)

label define race_lbl ///
    1 "Black / African" ///
    2 "White / European" ///
    3 "Coloured / Mixed race" ///
    4 "Arab / Lebanese / North African" ///
    5 "Asian (South & East Asian)" ///
    6 "Other", replace
label values race race_lbl
label var race "Race of respondent (grouped)"

recode race (1 = 1) (2 = 2) (3/6 = 3), gen(race_group)
label define race_group_lbl ///
    1 "Black / African" ///
    2 "White / European" ///
    3 "Others", replace
label values race_group race_group_lbl
tab race_group

* Religion (Q98A)
recode Q98A ///
    (0 = 5) ///
    (1 = 2) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 3) ///
    (5/17 = 3) ///
    (18/24 = 1) ///
    (25 = 4) ///
    (26 = 5) ///
    (27 = 5) ///
    (28 = 5) ///
    (29 = 5) ///
    (30/33 = 3) ///
    (34 = 5) ///
    (100 = 2) ///
    (260 = 3) ///
    (261 = 3) ///
    (300 = 3) ///
    (340 = 3) ///
    (460 = 3) ///
    (462 = 3) ///
    (463 = 3) ///
    (660 = 5) ///
    (742 = 3) ///
    (820 = 3) ///
    (821 = 3) ///
    (900 = 5) ///
    (1260 = 5) ///
    (1340 = 5) ///
    (1750 = 5) ///
    (1751/1757 = 3) ///
    (1758 = 5) ///
    (9995 = 5) ///
    (9998 = .) ///
    (9999 = .) ///
    (-1 = .), gen(religion)

label define religion_lbl ///
    1 "Muslim" ///
    2 "Christian" ///
    3 "Protestant (other Christian)" ///
    4 "Traditional/Ethnic religion" ///
    5 "Others (incl. None)", replace
label values religion religion_lbl
label var religion "Religion of respondent"
tab religion

* Urban–rural (URBRUR)
recode URBRUR (1 = 1) (2 3 = 2), gen(urban_rural)
label define urban_rural_lbl 1 "Urban" 2 "Rural", replace
label values urban_rural urban_rural_lbl
label var urban_rural "Urban-Rural"

* Ethnic group (Q81)
recode Q81 (-1 9990 9994 9995 9998 9999 = .), gen(ethnic)
label var ethnic "Ethnic group of respondent"

* Education (Q97 -> recoded_education, educ_group)
recode Q97 ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (6 = 6) ///
    (7 = 7) ///
    (8 = 8) ///
    (9 = 9) ///
    (-1 98 = .) ///
    (99 = .), gen(recoded_education)

label define education_lbl ///
    0 "No formal schooling" ///
    1 "Informal schooling only" ///
    2 "Some primary schooling" ///
    3 "Primary school completed" ///
    4 "Some secondary/high school" ///
    5 "Secondary/high school completed" ///
    6 "Post-secondary, non-university" ///
    7 "Some university" ///
    8 "University completed" ///
    9 "Post-graduate", replace
label values recoded_education education_lbl
label var recoded_education "Re-coded education levels"

recode recoded_education (0 1 2 = 1) (3 4 = 2) (5 6 7 = 3) (8 9 = 4), gen(educ_group)
label define educ_group_lbl ///
    1 "Below primary" ///
    2 "Primary" ///
    3 "Secondary" ///
    4 "University", replace
label values educ_group educ_group_lbl
label var educ_group "Education level"
tab educ_group

* Employment status (Q95A)
recode Q95A ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (8 = .) ///
    (9 = .) ///
    (-1 = .), gen(recoded_employment)

label define employment_status_lbl ///
    0 "No (not looking)" ///
    1 "No (looking)" ///
    2 "Yes, part time" ///
    3 "Yes, full time", replace
label values recoded_employment employment_status_lbl
label var recoded_employment "Re-coded employment status"

recode recoded_employment (0 1 = 1) (2 = 2) (3 = 3), gen(emp_group)
label define emp_group_lbl ///
    1 "Not employed" ///
    2 "Part-time" ///
    3 "Full-time", replace
label values emp_group emp_group_lbl
label var emp_group "Employment status"
tab emp_group

* Safety walking in neighbourhood (Q8A)
recode Q8A ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q8A)

label define unsafe_walk_lbl ///
    0 "Never" ///
    1 "Just once or twice" ///
    2 "Several times" ///
    3 "Many times" ///
    4 "Always", replace
label values recoded_Q8A unsafe_walk_lbl
label var recoded_Q8A "Feeling unsafe walking in neighbourhood"

recode recoded_Q8A (0 1 = 1) (2 = 2) (3 4 = 3), gen(safety_group)
label define safety_group_lbl ///
    1 "Never or rarely" ///
    2 "Occasionally" ///
    3 "Frequently", replace
label values safety_group safety_group_lbl
label var safety_group "Safety status"
gen safety = safety_group

* Fear crime at home (Q8B)
recode Q8B ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q8B)

label define fear_crime_home_lbl ///
    0 "Never" ///
    1 "Just once or twice" ///
    2 "Several times" ///
    3 "Many times" ///
    4 "Always", replace
label values recoded_Q8B fear_crime_home_lbl
label var recoded_Q8B "Fearing crime at home (original)"

recode recoded_Q8B (0 1 = 1) (2 = 2) (3 4 = 3), gen(fearing_crime)
label define fearing_crime_lbl ///
    1 "Never or rarely" ///
    2 "Occasionally" ///
    3 "Frequently", replace
label values fearing_crime fearing_crime_lbl
label var fearing_crime "Fearing crime at home"
tab fearing_crime

* Discussing politics (Q9)
recode Q9 ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q9)

label define politics_lbl ///
    0 "Never" ///
    1 "Occasionally" ///
    2 "Frequently", replace
label values recoded_Q9 politics_lbl
label var recoded_Q9 "Discussing politics (original)"

recode recoded_Q9 (0 = 0) (1 = 1) (2 = 2), gen(discuss_politics)
label define discuss_politics_lbl ///
    0 "Never discuss politics" ///
    1 "Occasionally discuss politics" ///
    2 "Frequently discuss politics", replace
label values discuss_politics discuss_politics_lbl
label var discuss_politics "Frequency of discussing politics (grouped)"
tab discuss_politics

* EA-level infrastructure
replace EA_FAC_C = . if EA_FAC_C == 9
replace EA_SEC_B = . if EA_SEC_B == 9
replace EA_SVC_B = . if EA_SVC_B == 9

misstable summarize EA_FAC_C EA_SEC_B EA_SVC_B

gen police_station = (EA_FAC_C == 1)
gen soldiers_army  = (EA_SEC_B == 1)
gen piped_water    = (EA_SVC_B == 1)

label var police_station "Police station in town/village"
label var soldiers_army  "Soldiers/army in town/village"
label var piped_water    "Piped water system in town/village"

***************************************************************
* 8. OUTCOME VARIABLES – DEMOCRACY & AUTHORITARIANISM (R8)
***************************************************************

* Support for democracy (Q21)
recode Q21 ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q21)

label define democracy_support_lbl ///
    1 "It doesn't matter what kind of government" ///
    2 "Non-democratic government can be preferable" ///
    3 "Democracy is preferable", replace
label values recoded_Q21 democracy_support_lbl
label var recoded_Q21 "Support for democracy (original)"

recode recoded_Q21 (1 2 = 1) (3 = 2), gen(support_democracy_group)
label define support_democracy_group_lbl ///
    1 "Against democracy or indifferent" ///
    2 "Support democracy", replace
label values support_democracy_group support_democracy_group_lbl
label var support_democracy_group "Support for democracy (grouped)"

* Extent of democracy (Q36)
recode Q36 ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (8 = .) ///
    (9 = .) ///
    (98 = .), gen(recoded_Q36)

label define democracy_extent_lbl ///
    1 "Not a democracy" ///
    2 "A democracy with major problems" ///
    3 "A democracy with minor problems" ///
    4 "A full democracy", replace
label values recoded_Q36 democracy_extent_lbl
label var recoded_Q36 "Extent of democracy (original)"

recode recoded_Q36 (1 = 1) (2 3 = 2) (4 = 3), gen(democracy_group)
label define democracy_group_lbl ///
    1 "Not a democracy" ///
    2 "Democracy with problems" ///
    3 "Full democracy", replace
label values democracy_group democracy_group_lbl
label var democracy_group "Extent of democracy (grouped)"

* Authoritarian alternatives (Q20A/B/C)
recode Q20B ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q20B)

label define military_rule_rejection_lbl ///
    1 "Strongly disapprove" ///
    2 "Disapprove" ///
    3 "Neither approve nor disapprove" ///
    4 "Approve" ///
    5 "Strongly approve", replace
label values recoded_Q20B military_rule_rejection_lbl
label var recoded_Q20B "Rejection of military rule (original)"

recode recoded_Q20B (1 2 = 3) (3 = 2) (4 5 = 1), gen(rejection_military_rule)
label define rejection_military_rule_lbl ///
    1 "Support military rule" ///
    2 "Neutral" ///
    3 "Reject military rule", replace
label values rejection_military_rule rejection_military_rule_lbl
label var rejection_military_rule "Rejection of military rule (grouped)"

recode Q20A ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q20A)

label define one_party_rule_rejection_lbl ///
    1 "Strongly disapprove" ///
    2 "Disapprove" ///
    3 "Neither approve nor disapprove" ///
    4 "Approve" ///
    5 "Strongly approve", replace
label values recoded_Q20A one_party_rule_rejection_lbl
label var recoded_Q20A "Rejection of one-party rule (original)"

recode recoded_Q20A (1 2 = 3) (3 = 2) (4 5 = 1), gen(rejection_one_party_rule)
label define rejection_one_party_rule_lbl ///
    1 "Support one-party rule" ///
    2 "Neutral" ///
    3 "Reject one-party rule", replace
label values rejection_one_party_rule rejection_one_party_rule_lbl
label var rejection_one_party_rule "Rejection of one-party rule (grouped)"

recode Q20C ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q20C)

label define one_man_rule_rejection_lbl ///
    1 "Strongly disapprove" ///
    2 "Disapprove" ///
    3 "Neither approve nor disapprove" ///
    4 "Approve" ///
    5 "Strongly approve", replace
label values recoded_Q20C one_man_rule_rejection_lbl
label var recoded_Q20C "Rejection of one-man rule (original)"

recode recoded_Q20C (1 2 = 3) (3 = 2) (4 5 = 1), gen(rejection_one_man_rule)
label define rejection_one_man_rule_lbl ///
    1 "Support one-man rule" ///
    2 "Neutral" ///
    3 "Reject one-man rule", replace
label values rejection_one_man_rule rejection_one_man_rule_lbl
label var rejection_one_man_rule "Rejection of one-man rule (grouped)"

***************************************************************
* 9. COMPOSITE & FINAL VARIABLES
***************************************************************

* Composite: rejection of authoritarian alternatives
egen auth_support = rowmean(rejection_military_rule ///
                            rejection_one_party_rule ///
                            rejection_one_man_rule)
label var auth_support "Rejection of authoritarian alternatives"
summ auth_support

* Main DV convenience copies
gen demo_support     = support_democracy_group
gen demo_rated       = democracy_group

* Outcomes macro for robustness regressions (only existing vars)
global outcomes_groups demo_support auth_support 

***************************************************************
* 10. SAVE CLEANED ROBUSTNESS DATA
***************************************************************

save "${data}/R8_final.dta", replace
***************************************************************
* END OF DO-FILE
***************************************************************
